\documentclass{llncs}

%\usepackage{llncsdoc}

%\usepackage{makeidx}  % allows for indexgeneration
\usepackage{graphicx}
\usepackage[T1]{fontenc}
\usepackage[english]{babel}
\usepackage[utf8]{inputenc}

\usepackage{paralist}


%%%Math
\usepackage{latexsym}
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{amsthm}
\usepackage{eurosans}

\usepackage{eurosym}

\usepackage{longtable}

\usepackage{listings}

\usepackage{color}

\definecolor{darkred}{rgb}{0.5, 0, 0}
\definecolor{violet}{rgb}{1, 0, 1}
\definecolor{green}{rgb}{0.3, 0.95, 0.3}
\definecolor{listinggray}{gray}{0.97}



\begin{document}

\title{Semantic Methods for Reusing Linking Open Data of the European Public Procurement Notices\thanks{
This work is part of '10ders Information Services project' partially funded by the Spanish Ministry of
Industry, Commerce and Tourism, leaded by 'Gateway Strategic Consultancy
Services' and developed in cooperation with 'EXIS TI' (\url{http://www.exis-ti.com/}) and WESO Research Group.}}

\titlerunning{Semantic Methods for Reusing Linking Open Data of the European Public Procurement Notices}

\author{Jose Mar\'{i}a \'{A}lvarez\inst{1} \and Jos\'{e} Emilio Labra\inst{1} \and \'{A}ngel Mar\'{i}n\inst{2} \and Jos\'{e} Luis Mar\'{i}n\inst{2}} 


\authorrunning{Jose Mar\'{i}a Alvarez and Jos\'{e} Emilio Labra and '{A}ngel Mar\'{i} and Jos\'{e} Luis Mar\'{i}n}


\tocauthor{Jose Mar\'{i}a Alvarez and Jos\'{e} Emilio Labra and '{A}ngel Mar\'{i} and Jos\'{e} Luis Mar\'{i}n}


\institute{WESO Research Group-Universidad de Oviedo\\
 \and Gateway Strategic Consultancy Services \\
 \email{{josem.alvarez@josem.alvarez.es}},
 \email{{labra@uniovi.es}}, \email{{anmar@gateway-scs.es}}, \email{{josmar@gateway-scs.es}}\\  
  WWW home page: \texttt{http://purl.org/weso}, \\ \texttt{http://gateway-scs.es/}
}


\date{7 January 2011}

\maketitle

\renewcommand{\labelitemi}{$\bullet$}

\begin{abstract}
The aim of this paper is to show the activities to be performed
in the development of a PhD. about e-procurement using linking open data. The study will be focused on: 1) modeling 
the unstructured information included in public procurement notices (organizations, contracting authorities, contracts awarded, etc.)
using semantic web technologies; 2) enriching that information with the existing product
classification systems and the linked data vocabularies; 3) publishing relevant information extracted out of the notices following the linking open data approach;
4) exploiting the information through advanced algorithms providing value added services to users, with special focus on SMEs. Finally an evaluation
methodology is outlined to validate the goodness and the improvement of the proposed system regarding to the existing ones.
\end{abstract}




\section{Introduction}
% FIXME: Problem: describe the core problem of the PhD and motivate its relevance for the Semantic Web area;
% Public procurement is a key sector of the EU economy accounting for about 16\% 
% of GDP~\footnote{\url{http://ec.europa.eu/internal\_market/publicprocurement/e-procurement\_en.htm}}. 
% The EU is seeking for modernising and opening up procurement markets across borders with the objective
% of improving the Europe’s competitiveness and for creating new opportunities for EU businesses.

The European Commission outlines the following advantages in the wider use of 
e-Procurement~\footnote{\url{http://ec.europa.eu/internal\_market/consultations/docs/2010/e-procurement/green-paper\_en.pdf}}: increased accessibility and transparency, benefits for individual procedures, benefits in terms of more efficient procurement administration
and potential for integration of EU procurement markets. TED\footnote{\url{http://ted.europa.eu/}} ('Tenders Electronic Daily') is
the on line version of the 'Supplement to the Official Journal of the European Union', dedicated to
European public procurement (1500 new procurement notices every day~\footnote{\url{http://www.ted.europa.eu/TED/main/HomePage.do}} but 
an unified information system pan-European dealing with: 1) dispersion of the information; 2) duplication of the same notice in more than one
source; 3) different publishing formats; 4) problems regarding to a multilingual environment and 5) aggregation of low-value procurement opportunities, 
is missing. 

On the other hand in the European eGovernment context, there are several conceptual/terminological
maps of particular domains available in RAMON\footnote{\url{http://ec.europa.eu/eurostat/ramon}}, the Eurostat's
metadata server: Health, Education, Employment or e-Procurement among others. 
The structure and features of these systems are very heterogeneous, although
some common aspects can be found in all of them: 1) hierarchical relationships between terms or concepts; 2) multilingual
character of the information. These knowledge organization systems (KOS) enable users to annotate information providing 
an agile mechanism for performing tasks such as exploration, searching, automatic classification or reasoning. 

Obviously one of the most interesting domains to apply the Linking Open Data (LOD) approach
is public procurement information published by governmental contracting authorities. In that sense, the growing commitment to
the reuse of public sector information (PSI) and initiatives like semantic web, LOD and the use of KOS provide building blocks
for an innovative unified pan-European information system for the benefit of SMEs.

%\subsection{Main Contributions}
\textbf{Main Contributions}

This work aims to apply the semantic web and LOD approaches to public procurement
notices: 1) Transforming government controlled vocabularies such as
CPV~\footnote{\url{
http://europa.eu/legislation\_summaries/internal\_market/businesses/public\_proc
urement/l22008\_en.htm}},
CPC~\footnote{\url{http://unstats.un.org/unsd/cr/registry/isic-4.asp}}, 
Eurovoc\footnote{\url{http://eurovoc.europa.eu/}} (now available in SKOS),
etc. to RDF, RDF(S), SKOS or OWL; 2) Modeling the information inside the public procurement notices as web
information resources and enriching them with the aforementioned
controlled vocabularies, geographical information (e.g NUTS\footnote{\url{http://epp.eurostat.ec.europa.eu/portal/page/portal/nuts\_nomenclature/introduction}}) and the
information now available in the linked data cloud; 3) Publishing the information in a SPARQL endpoint providing a ``linked data node'' and 4) Providing enhanced services (search and sort, matchmaking, georeasoning, statistics, etc.) 
exploiting this semantic information through ``advanced algorithms'' based on Spreading Activation (SA) techniques,
rule based systems (RBS) and a mixing of them. E.g: \textit{Which public procurement
notices are relevant to Dutch companies (only SMEs) that want to tender for contracts announced
by local authorities with a total value lower than 170K \euro\mbox{ }to procure ``Transport and Related Services`` 
and a two year duration in the Dutch-speaking region of Flanders (Belgium)?}.

Finally, the methodology used to address these contributions and the PhD is based on 
STI Research Plan\footnote{\url{http://www.sti-innsbruck.at/uploads/media/STI\_Research\_Plan\_03.12.2008.pdf}}. 

\section{Related work}
% FIXME: State of the art: describe relevant related work and point out areas that need to be improved;
In the scope of LOD and open government data (OGD) there are projects
trying to exploit the information of public procurement notices like
LOTED~\footnote{\url{http://loted.eu:8081/LOTED1Rep/}} (``Linked Open Tenders
Electronic Daily'') where they use the RSS feeds of TED. 
UK government\footnote{\url{http://data.gov.uk}} is doing a great effort to promote its information
sources using the LOD approach. They have published datasets
from different sectors: transport, defense, NUTS geographical information~\footnote{\url{http://nuts.psi.enakting.org/}}, etc. Most of the
public administrations in the different countries are also betting for LOD
approach to make public their information: Spain (Aporta project~\footnote{\url{http://www.aporta.es/}}), USA\footnote{\url{http://www.data.gov/}}, etc. Regarding the use
of LOD and organizations there is a new ontology for modeling the information
about organizations~\footnote{\url{http://www.epimorphics.com/web/category/category/developers/organization-ontology}} and
recently it has been released ``The Open Database Of The Corporate World''~\footnote{\url{http://opencorporates.com/}}.

Product Scheme Classifications (also known as PSCs) have been built to solve
specific problems of interoperability and communication in
e-commerce\cite{Leukel-exchange}. The aim of a PSC is to be used as a standard \textit{de facto} by different agents
for information interchange in marketplaces~\cite{FenselOmel2001,DBLP:journals/tcci/Alor-HernandezAJPRMBG10}. Any PSC, as well as other classification 
systems can be interpreted as: 1) domain-ontologies~\cite{Hepp-possible} or 2) conceptual schemes~\cite{chemaEurovoc2008}
comprised of conceptual resources . Finally, Good Relations~\footnote{\url{http://www.heppnetz.de/projects/goodrelations/}} is an
ontology for the e-commerce developed by Martin Hepp et. al. 

On the other hand, the main use of SA techniques is focus on
Document and Information Retrieval~\cite{Cohen1987}. These techniques has been also
used in semantic search based on hybrid approaches~\cite{RochaSA04,DBLP:conf/jckbse/BerruetaGP06}, 
user query expansion combining metadata and user information to
improve web data annotations. RBSs have been used a long time
to decision support, diagnosis, etc. in different fields. In the 
semantic web area and due to the apparition of OWL 2-RL, SPARQL Rules! and RIF these
systems are growing in their use to deal with the web of data but a clear
approach to mix datasets and RBSs is missing. They can also be applied to SA 
techniques to handle the activation and propagation of the concepts. 


\section{Proposed approach}
The proposed architecture, see Fig.~\ref{fig:arch}, is based on two main processes: 1) RDFizing. It is the process to transform the data available in the
databases about public procurement notices from a XML intermediate format to RDF and enrich them with the vocabularies
of the linked data cloud. It also codes the PSCs as linked data. 2) Enhanced services. It is the application of the libraries such as
ONTOSPREAD~\footnote{\url{http://code.google.com/p/ontospread/}} and RIFle\footnote{\url{http://rifle.sf.net}} to exploit the linked database
and provide services to the customers.

\begin{figure}[h]
 \centering
    \includegraphics[width=5cm]{images/arch}
    \caption{Proposed Architecture Overview}
 \label{fig:arch}
\end{figure}

The combination of these building blocks seeks for creating a new innovative way to exploit
the information included in public procurement notices in the context of the semantic
web and LOD initiatives reusing the existing technologies,
vocabularies, etc. Following, the key points of this approach are summarized: 
1) existing PSCs, information about public procurement notices and organizations are published as linked data;
2) the application of SA techniques, RBSs and a mixing of them to provide enhanced services adding value to original information.

\section{Process to PhD-Results}
Currently we are finishing the process for publishing the PSCs and the information
extracted from public procurement notices as linked data. We are also trying to tune up the SA techniques for working with rules and generate
SPARQL queries to be executed in a SPARQL endpoint. Finally we are designing the experiment
to validate the goodness and the improvement of the system regarding to existing systems.

In that sense, the experiment apart from the selected service to be tested depends on two main variables: 1) the amount of information
used and 2) the number of tests that should be carried out. From the first variable point of view
1M public procurement notices (provided by Gateway SCS-Eurolert.net\footnote{\url{http://euroalert.net/}}) and over 320K organizations~\footnote{\url{ftp://ftp.ted.europa.eu/META-XML/}} are available. On the second one,
we have not decided yet how many tests would be appropriate to provide a correct evaluation but
the information about how many queries are requested per day in the existing public systems
can be a right trail. 

On the other hand, taking into account that the service of searching or matchmaking is the most relevant in this kind of system
we are preparing a test suite with the aforementioned information(search queries and expected results) to
compare the precision and recall of existing public systems (free text and advanced key fields search of TED) to the
proposed one (LOD+SA+RBS+SPARQL). The expected result of this evaluation will validate our
approach for improving the access and retrieval of the information about public procurement notices
using the LOD approach.

\section{Conclusions and Future Work}
% FIXME: Conclusions and future work: conclude and specify the major items of future work.
The implementation of this work is supposed to afford a new way to exploit
the information published inside public procurement notices applying advanced algorithms on LOD. 
Following we highlight the advantages of this approach: 1) decreasing of the information's dispersion; 2) unification of the data models and formats;
3) implicit support to multilingual and multicultural issues; 4) enrichment of the public procurement notices;
5) alignment with the Digital Agenda for Europe; 6) raise awareness on public procurement
opportunities among SMEs and 7) deployment of enhanced services on public procurement notices.
Regarding the future work, the results of this study are intended to be exploited by a commercial service like Eurolert.net~\cite{web20,marin2010} and we
 are also interested in report the results to \textit{The Internal Market and Services Directorate General (DG MARKT) of the European Commision}, 
 \textit{The Information Society and Media Directorate General (DG INFSO) of the European Commision}, the LOD and OGD initiatives among others. 



\bibliographystyle{plain}
%\bibliographystyle{unsrt}
%\bibliographystyle{acm}
\bibliography{references}
\renewcommand{\bibname}{References}
\end{document}

